import os
import jieba
import jieba.posseg as pseg

n_e = ["nr", "n", "ns", "nt", "nz"]
csv_path = "./labels"
userdict_path = "../userdict.txt"

def get_vocabulary(article_path, cvs_name):
    if not os.path.exists(article_path): return
    if not os.path.exists(csv_path): os.mkdir(csv_path)
    def _get_n_list(text):
        r = []
        for g in pseg.lcut(text):
            if g.flag in n_e:
                r.append(g.word)
        return r

    with open(os.path.join(csv_path,cvs_name),"a") as u:
        for article in os.listdir(article_path):
            with open(os.path.join(article_path,article), "r") as f:
                text = f.read()
            n_list = list(filter(lambda x: len(x)>=2, set(_get_n_list(text))))
            list(map(lambda x: u.write(x + "\n"), n_list))

    with open(os.path.join(csv_path, cvs_name), "r") as o:
        word = o.read()
        with open(userdict_path, "a") as f:
            f.write(word)

    return

if __name__ == '__main__':
    article_path = "./fashion"
    cvs_name = "时尚.csv"
    get_vocabulary(article_path, cvs_name)
